Set up contains all the libraries needed to run the code, some important global variables, and preliminary calculations needed in our analysis.
# mcc codes
read_csv("https://raw.githubusercontent.com/greggles/mcc-codes/main/mcc_codes.csv") %>%
dplyr::select(
MCC = mcc,
label = edited_description
) %>%
saveRDS("rds/mcc_codes.rds")
# spending mcc only up to July (month 7)
spending_MCC %>%
group_by(year(date),month(date),MCC,label) %>%
summarize(
sum_trans = sum(as.numeric(transaction_counts), na.rm=T),
avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>%
saveRDS("baymap/rds/spending_MCC_year.rds")
spending_MCC %>%
group_by(year(date),wday(date),MCC,label) %>%
summarize(avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>%
saveRDS("baymap/rds/spending_MCC_day.rds")
# spending mcc by zip for 2020
spending_MCC %>%
filter(year(date) == 2020) %>%
group_by(month(date),zip,MCC,label) %>%
summarize(
avg_doll_per_trans=mean(as.numeric(total_spent) / as.numeric(transaction_counts), na.rm=T)) %>%
left_join(bay_zip %>% select(ZIP_CODE,PO_NAME,COUNTY),by=c("zip"="ZIP_CODE")) %>%
saveRDS("baymap/rds/spending_MCC_zip.rds")
# amazon + walmart reach - all of CA
az <-
read.csv("references/amazonfresh_zipcodes.csv",colClasses=c("character","character")) %>%
# filter(Zip %in% bay_zip$ZIP_CODE) %>%
# left_join(bay_zip, by=c("Zip"="ZIP_CODE")) %>%
left_join(ca_zip, by=c("Zip"="ZIP_CODE")) %>%
st_as_sf(crs=4326) %>%
mutate(area = st_area(.)) %>%
summarise(area=sum(area))
w <-
read.csv("references/walmart_delivery_stores_ca.csv") %>%
# filter(Zip.Code %in% bay_zip$ZIP_CODE) %>%
st_as_sf(coords=c("Longitude", "Latitude"), crs=4326)
wz <-
w %>%
st_transform(crs = CA_ALBERS) %>%
st_buffer(dist = 9 * 1609.344) %>%
st_transform(crs=4326) %>%
# st_intersection(bay_zip) %>%
st_intersection(ca_zip) %>%
mutate(area = st_area(.)) %>%
summarise(area=sum(area))
list("amz_zip"=az,"wmt"=w,"wmt_zip"=wz) %>% saveRDS('baymap/rds/amz_wmt_zip.rds')
az %>%
st_union(wz) %>%
saveRDS('rds/amz_wmt.rds')
# affected population
az_wz <- readRDS('rds/amz_wmt.rds')
az_wz_zip <- readRDS('baymap/rds/amz_wmt_zip.rds')
snap_county <-
read_excel("references/CalFresh Data/CFDashboardData.xlsx",skip=1) %>%
filter(Date == as.Date("2020-05-01")) %>%
select(County,`CalFresh Persons`) %>%
left_join(ca_counties, by=c("County"="NAME")) %>%
st_as_sf(crs=4326)
mapped <- function(x, data){
counties_union <-
ca_counties %>%
filter(NAME == x) %>%
st_cast("MULTIPOLYGON") %>%
st_intersection(st_make_valid(data))
if (nrow(counties_union)==0){
return(NA)
}
result <-
snap_county %>%
filter(County == x) %>%
st_cast("MULTIPOLYGON") %>%
.[,"CalFresh Persons"] %>%
st_interpolate_aw(counties_union , extensive=T) %>%
pull("CalFresh.Persons") %>%
round(digits=0)
return(result)
}
snap_county %>%
mutate(
# `Reached by Walmart` =
# snap_county$County %>%
# map(mapped,data=az_wz_zip$wmt_zip) %>%
# unlist(),
# `Reached by Amazon` =
# snap_county$County %>%
# map(mapped,data=az_wz_zip$amz_zip) %>%
# unlist(),
`Total Reached` =
snap_county$County %>%
map(mapped,data=az_wz) %>%
unlist(),
`% Reached` = round(100 * `Total Reached` / `CalFresh Persons`,1)
) %>%
as.data.frame() %>%
select(County, `CalFresh Persons`, `Total Reached`, `% Reached`) %>%
replace_na(list(`Total Reached`=0, `% Reached`=0)) %>%
# select(County, `CalFresh Persons`, `Reached by Amazon`, `Reached by Walmart`,`Total Reached`, `% Reached`) %>%
# replace_na(list(`Reached by Amazon`=0, `Reached by Walmart`=0,`Total Reached`=0, `% Reached`=0)) %>%
saveRDS('baymap/rds/amz_wmt_reach.rds')
get_trend <- function(data,name){
AMT <-
data[data$TOTAL_AMT > 0,] %>%
group_by(month(DATE), TRANSACTION_MODE) %>%
summarize(
AVG_AMT=mean(as.numeric(TOTAL_AMT), na.rm=T),
AVG_TRX_CNT=mean(as.numeric(TOTAL_TRX_CNT), na.rm=T),
AVG_AMT_PER_TRX = mean(as.numeric(TOTAL_AMT) / as.numeric(TOTAL_TRX_CNT), na.rm=T),
TOTAL_TRX_CNT=sum(as.numeric(TOTAL_TRX_CNT), na.rm=T)
)
GROCERY_AMT <-
data[data$TOTAL_GROCERY_SUPERMARKET_AMT > 0,] %>%
group_by(month(DATE), TRANSACTION_MODE) %>%
summarize(
AVG_GROCERY_AMT=mean(as.numeric(TOTAL_GROCERY_SUPERMARKET_AMT), na.rm=T),
TOTAL_GROCERY_TRX_CNT=sum(as.numeric(TOTAL_GROCERY_SUPERMARKET_TRX_CNT), na.rm=T),
AVG_GROCERY_AMT_PER_TRX = mean(as.numeric(TOTAL_GROCERY_SUPERMARKET_AMT) / as.numeric(TOTAL_GROCERY_SUPERMARKET_TRX_CNT), na.rm=T)
)
AMT %>%
left_join(GROCERY_AMT, by=c("month(DATE)","TRANSACTION_MODE")) %>%
mutate(
store=name
)
}
wmt_trend <- get_trend(wmt,"WMT")
amz_trend <- get_trend(amz,"AMZ")
wmt_trend %>%
rbind(amz_trend) %>%
replace_na(list(AVG_GROCERY_AMT=0,TOTAL_GROCERY_TRX_CNT=0,AVG_GROCERY_AMT_PER_TRX=0)) %>%
saveRDS("baymap/rds/amz_wmt_trend.rds")
wmt %>%
filter(year(as.Date(DATE)) == 2020) %>%
filter(TRANSACTION_MODE == "Online") %>%
mutate(store="WMT") %>%
group_by(CRD_HLDR_ZIP5) %>%
summarize(
TOTAL_TRX_CNT=sum(as.numeric(TOTAL_TRX_CNT), na.rm=T)) %>%
left_join(ca_zip,by=c("CRD_HLDR_ZIP5"="ZIP_CODE")) %>%
select(CRD_HLDR_ZIP5,PO_NAME,TOTAL_TRX_CNT,geometry) %>%
st_as_sf(crs=4326) %>%
saveRDS("baymap/rds/wmt_online_zip.rds")
Pink areas are Amazon Fresh delivery coverage by zip code provided by CAFB. Blue areas represent the 9 mile delivery radius from Walmart Grocery Pick-up locations. Overlapping these two geometries together, we can find the approximate SNAP population reached in each county through area-weighted interpolation. Some key insights are highlighted below:
We’d also like to note that this method works better with higher levels of granularity (e.g. census tracts or census block groups) or urban dense communities. Right now, we only have access to May 2020 SNAP participation on a county-level.
amz_wmt_zip <- readRDS('baymap/rds/amz_wmt_zip.rds')
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = amz_wmt_zip$amz_zip ,
color = "#e851a2",
fillOpacity = 0.5,
weight=1,
group="Amazon"
) %>%
addPolygons(
data = amz_wmt_zip$wmt_zip,
color = "#56bedf",
fillOpacity = 0.5,
weight=1,
group="Walmart"
) %>%
addCircleMarkers(
data = amz_wmt_zip$wmt,
color = "black",
radius = 2,
label = ~Address,
group = "Walmart"
) %>%
addPolygons(
data = ca_counties,
fillOpacity = 0,
color = "black",
weight = 1.5,
dashArray = 3,
label = as.character(ca_counties$NAME),
labelOptions = labelOptions(style=list('font-weight'='bold','text-transform'='uppercase')),
group = "County Lines"
) %>%
addLayersControl(
overlayGroups = c("Amazon","Walmart","County Lines")
)
# table of reach
amz_wmt_reach <- readRDS('baymap/rds/amz_wmt_reach.rds')
# amz_wmt_reach[1,"Reached by Amazon"] <- sum(amz_wmt_reach$`Reached by Amazon`, na.rm=T)
# amz_wmt_reach[1,"Reached by Walmart"] <- sum(amz_wmt_reach$`Reached by Walmart`, na.rm=T)
amz_wmt_reach[1,"Total Reached"] <- sum(amz_wmt_reach$`Total Reached`, na.rm=T)
amz_wmt_reach[1,"% Reached"] <- round(100 * amz_wmt_reach[1,"Total Reached"] / amz_wmt_reach[1,"CalFresh Persons"],1)
knitr::kable(amz_wmt_reach)
| County | CalFresh Persons | Total Reached | % Reached |
|---|---|---|---|
| Statewide | 4727883 | 2029295 | 42.9 |
| Alameda | 134219 | 104212 | 77.6 |
| Alpine | 148 | 0 | 0.0 |
| Amador | 2932 | 0 | 0.0 |
| Butte | 32491 | 0 | 0.0 |
| Calaveras | 5590 | 0 | 0.0 |
| Colusa | 1934 | 0 | 0.0 |
| Contra Costa | 73309 | 64830 | 88.4 |
| Del Norte | 5902 | 0 | 0.0 |
| El Dorado | 13305 | 576 | 4.3 |
| Fresno | 286792 | 29902 | 10.4 |
| Glenn | 3634 | 0 | 0.0 |
| Humboldt | 24681 | 0 | 0.0 |
| Imperial | 41901 | 4028 | 9.6 |
| Inyo | 2196 | 0 | 0.0 |
| Kern | 172901 | 10483 | 6.1 |
| Kings | 26337 | 4692 | 17.8 |
| Lake | 13771 | 0 | 0.0 |
| Lassen | 3455 | 0 | 0.0 |
| Los Angeles | 1383112 | 859488 | 62.1 |
| Madera | 29751 | 1153 | 3.9 |
| Marin | 13345 | 111 | 0.8 |
| Mariposa | 2658 | 0 | 0.0 |
| Mendocino | 13321 | 0 | 0.0 |
| Merced | 56379 | 14497 | 25.7 |
| Modoc | 1491 | 0 | 0.0 |
| Mono | 1010 | 0 | 0.0 |
| Monterey | 46322 | 2 | 0.0 |
| Napa | 7542 | 1114 | 14.8 |
| Nevada | 9114 | 0 | 0.0 |
| Orange | 242909 | 242617 | 99.9 |
| Placer | 16886 | 2912 | 17.2 |
| Plumas | 2401 | 0 | 0.0 |
| Riverside | 282116 | 79354 | 28.1 |
| Sacramento | 227892 | 170036 | 74.6 |
| San Benito | 5444 | 164 | 3.0 |
| San Bernardino | 357205 | 30152 | 8.4 |
| San Diego | 331073 | 110303 | 33.3 |
| San Francisco | 79157 | 78466 | 99.1 |
| San Joaquin | 105222 | 33146 | 31.5 |
| San Luis Obispo | 19086 | 0 | 0.0 |
| San Mateo | 27782 | 14201 | 51.1 |
| Santa Barbara | 46093 | 4533 | 9.8 |
| Santa Clara | 96962 | 64650 | 66.7 |
| Santa Cruz | 28816 | 990 | 3.4 |
| Shasta | 26412 | 1746 | 6.6 |
| Sierra | 296 | 0 | 0.0 |
| Siskiyou | 7964 | 0 | 0.0 |
| Solano | 44165 | 32172 | 72.8 |
| Sonoma | 31184 | 342 | 1.1 |
| Stanislaus | 80723 | 24004 | 29.7 |
| Sutter | 12540 | 4080 | 32.5 |
| Tehama | 10541 | 0 | 0.0 |
| Trinity | 2068 | 0 | 0.0 |
| Tulare | 118467 | 6505 | 5.5 |
| Tuolumne | 5564 | 0 | 0.0 |
| Ventura | 72388 | 24831 | 34.3 |
| Yolo | 21870 | 5207 | 23.8 |
| Yuba | 15114 | 3796 | 25.1 |